Directory full of pixel data scaling code that will eventually migrate
authorOwen Taylor <otaylor@redhat.com>
Wed, 5 Jan 2000 21:33:58 +0000 (21:33 +0000)
committerOwen Taylor <otaylor@src.gnome.org>
Wed, 5 Jan 2000 21:33:58 +0000 (21:33 +0000)
2000-01-05  Owen Taylor  <otaylor@redhat.com>

* gdk-pixbuf/pixops/: Directory full of pixel data scaling
code that will eventually migrate into libart.

* configure.in acconfig.h: Add checks for MMX compiler support

* gdk-pixbuf/gdk-pixbuf.h gdk-pixbuf/gdk-pixbuf-scale.c:
Nice wrapper routines for the code in pixops that operate
on pixbufs instead of raw data.

* gdk-pixbuf/testpixbuf-scale: Test program for scaling
routines.

16 files changed:
demos/testpixbuf-scale.c [new file with mode: 0644]
gdk-pixbuf/.cvsignore
gdk-pixbuf/ChangeLog
gdk-pixbuf/Makefile.am
gdk-pixbuf/gdk-pixbuf-scale.c [new file with mode: 0644]
gdk-pixbuf/gdk-pixbuf.h
gdk-pixbuf/pixops/.cvsignore [new file with mode: 0644]
gdk-pixbuf/pixops/Makefile.am [new file with mode: 0644]
gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S [new file with mode: 0644]
gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S [new file with mode: 0644]
gdk-pixbuf/pixops/have_mmx.S [new file with mode: 0644]
gdk-pixbuf/pixops/pixops-internal.h [new file with mode: 0644]
gdk-pixbuf/pixops/pixops.c [new file with mode: 0644]
gdk-pixbuf/pixops/pixops.h [new file with mode: 0644]
gdk-pixbuf/pixops/scale_line_22_33_mmx.S [new file with mode: 0644]
gdk-pixbuf/pixops/timescale.c [new file with mode: 0644]

diff --git a/demos/testpixbuf-scale.c b/demos/testpixbuf-scale.c
new file mode 100644 (file)
index 0000000..c2891bd
--- /dev/null
@@ -0,0 +1,163 @@
+#include <gtk/gtk.h>
+#include "gdk-pixbuf.h"
+
+#include <stdio.h>
+
+ArtFilterLevel filter_level = ART_FILTER_BILINEAR;
+int overall_alpha = 255;
+GdkPixbuf *pixbuf;
+GtkWidget *darea;
+  
+void
+set_filter_level (GtkWidget *widget, gpointer data)
+{
+  filter_level = GPOINTER_TO_UINT (data);
+  gtk_widget_queue_draw (darea);
+}
+
+void
+overall_changed_cb (GtkAdjustment *adjustment, gpointer data)
+{
+  if (adjustment->value != overall_alpha)
+    {
+      overall_alpha = adjustment->value;
+      gtk_widget_queue_draw (darea);
+    }
+}
+
+gboolean
+expose_cb (GtkWidget *widget, GdkEventExpose *event, gpointer data)
+{
+  GdkPixbuf *dest;
+
+  dest = gdk_pixbuf_new (ART_PIX_RGB, FALSE, 8, event->area.width, event->area.height);
+
+  if (pixbuf->art_pixbuf->has_alpha || overall_alpha != 255)
+    gdk_pixbuf_composite_color (pixbuf, dest,
+                               0, 0, event->area.width, event->area.height,
+                               -event->area.x, -event->area.y,
+                               (double) widget->allocation.width / pixbuf->art_pixbuf->width,
+                               (double) widget->allocation.height / pixbuf->art_pixbuf->height,
+                               filter_level, overall_alpha,
+                               event->area.x, event->area.y, 16, 0xaaaaaa, 0x555555);
+  else
+    gdk_pixbuf_scale (pixbuf, dest,
+                     0, 0, event->area.width, event->area.height,
+                     -event->area.x, -event->area.y,
+                     (double) widget->allocation.width / pixbuf->art_pixbuf->width,
+                     (double) widget->allocation.height / pixbuf->art_pixbuf->height,
+                     filter_level);
+
+
+  gdk_pixbuf_render_to_drawable (dest, widget->window, widget->style->fg_gc[GTK_STATE_NORMAL],
+                                0, 0, event->area.x, event->area.y,
+                                event->area.width, event->area.height,
+                                GDK_RGB_DITHER_NORMAL, event->area.x, event->area.y);
+  
+  gdk_pixbuf_unref (dest);
+  
+  return TRUE;
+}
+
+int
+main(int argc, char **argv)
+{
+       GtkWidget *window, *vbox;
+       GtkWidget *menuitem, *optionmenu, *menu;
+       GtkWidget *alignment;
+       GtkWidget *hbox, *label, *hscale;
+       GtkAdjustment *adjustment;
+       GtkRequisition scratch_requisition;
+
+       gtk_init (&argc, &argv);
+       gdk_rgb_init ();
+
+       if (argc != 2) {
+               fprintf (stderr, "Usage: testpixbuf-scale FILE\n");
+               exit (1);
+       }
+
+       pixbuf = gdk_pixbuf_new_from_file (argv[1]);
+       if (!pixbuf) {
+               fprintf (stderr, "Cannot load %s\n", argv[1]);
+               exit(1);
+       }
+
+       window = gtk_window_new (GTK_WINDOW_TOPLEVEL);
+       gtk_signal_connect (GTK_OBJECT (window), "destroy",
+                           GTK_SIGNAL_FUNC (gtk_main_quit), NULL);
+       
+       vbox = gtk_vbox_new (FALSE, 0);
+       gtk_container_add (GTK_CONTAINER (window), vbox);
+
+       menu = gtk_menu_new ();
+       
+       menuitem = gtk_menu_item_new_with_label ("NEAREST");
+       gtk_signal_connect (GTK_OBJECT (menuitem), "activate",
+                           GTK_SIGNAL_FUNC (set_filter_level),
+                           GUINT_TO_POINTER (ART_FILTER_NEAREST));
+       gtk_widget_show (menuitem);
+       gtk_container_add (GTK_CONTAINER (menu), menuitem);
+       
+       menuitem = gtk_menu_item_new_with_label ("BILINEAR");
+       gtk_signal_connect (GTK_OBJECT (menuitem), "activate",
+                           GTK_SIGNAL_FUNC (set_filter_level),
+                           GUINT_TO_POINTER (ART_FILTER_BILINEAR));
+       gtk_widget_show (menuitem);
+       gtk_container_add (GTK_CONTAINER (menu), menuitem);
+       
+       menuitem = gtk_menu_item_new_with_label ("TILES");
+       gtk_signal_connect (GTK_OBJECT (menuitem), "activate",
+                           GTK_SIGNAL_FUNC (set_filter_level),
+                           GUINT_TO_POINTER (ART_FILTER_TILES));
+       gtk_container_add (GTK_CONTAINER (menu), menuitem);
+
+       menuitem = gtk_menu_item_new_with_label ("HYPER");
+       gtk_signal_connect (GTK_OBJECT (menuitem), "activate",
+                           GTK_SIGNAL_FUNC (set_filter_level),
+                           GUINT_TO_POINTER (ART_FILTER_HYPER));
+       gtk_container_add (GTK_CONTAINER (menu), menuitem);
+
+       optionmenu = gtk_option_menu_new ();
+       gtk_option_menu_set_menu (GTK_OPTION_MENU (optionmenu), menu);
+       gtk_option_menu_set_history (GTK_OPTION_MENU (optionmenu), 1);
+       
+       alignment = gtk_alignment_new (0.0, 0.0, 0.0, 0.5);
+       gtk_box_pack_start (GTK_BOX (vbox), alignment, FALSE, FALSE, 0);
+
+       hbox = gtk_hbox_new (FALSE, 4);
+       gtk_box_pack_start (GTK_BOX (vbox), hbox, FALSE, FALSE, 0);
+
+       label = gtk_label_new ("Overall Alpha:");
+       gtk_box_pack_start (GTK_BOX (hbox), label, FALSE, FALSE, 0);
+
+       adjustment = GTK_ADJUSTMENT (gtk_adjustment_new (overall_alpha, 0, 255, 1, 10, 0));
+       gtk_signal_connect (GTK_OBJECT (adjustment), "value_changed",
+                           GTK_SIGNAL_FUNC (overall_changed_cb), NULL);
+       
+       hscale = gtk_hscale_new (adjustment);
+       gtk_scale_set_digits (GTK_SCALE (hscale), 0);
+       gtk_box_pack_start (GTK_BOX (hbox), hscale, TRUE, TRUE, 0);
+
+       gtk_container_add (GTK_CONTAINER (alignment), optionmenu);
+       gtk_widget_show_all (vbox);
+
+       /* Compute the size without the drawing area, so we know how big to make the default size */
+       gtk_widget_size_request (vbox, &scratch_requisition);
+
+       darea = gtk_drawing_area_new ();
+       gtk_box_pack_start (GTK_BOX (vbox), darea, TRUE, TRUE, 0);
+
+       gtk_signal_connect (GTK_OBJECT (darea), "expose_event",
+                           GTK_SIGNAL_FUNC (expose_cb), NULL);
+
+       gtk_window_set_default_size (GTK_WINDOW (window),
+                                    pixbuf->art_pixbuf->width,
+                                    scratch_requisition.height + pixbuf->art_pixbuf->height);
+       
+       gtk_widget_show_all (window);
+
+       gtk_main ();
+
+       return 0;
+}
index c5efdf6c68a840699ce9d59b4e6b932e7db050e2..6f2afc36013b9189df402ccfbf8c9944f55148cd 100644 (file)
@@ -7,4 +7,5 @@ Makefile
 *.lo
 testpixbuf
 testpixbuf-drawable
+testpixbuf-scale
 testanimation
index 3bb68cd30e0de97430e12762661949721e75cafd..8cfd7be4cd71d29898fd17ced38cdf1f73c4e2f2 100644 (file)
@@ -1,3 +1,17 @@
+2000-01-05  Owen Taylor  <otaylor@redhat.com>
+
+       * gdk-pixbuf/pixops/: Directory full of pixel data scaling
+       code that will eventually migrate into libart.
+
+       * configure.in acconfig.h: Add checks for MMX compiler support
+
+       * gdk-pixbuf/gdk-pixbuf.h gdk-pixbuf/gdk-pixbuf-scale.c:
+       Nice wrapper routines for the code in pixops that operate
+       on pixbufs instead of raw data.
+       
+       * gdk-pixbuf/testpixbuf-scale: Test program for scaling
+       routines.
+       
 2000-01-05  Jonathan Blandford  <jrb@redhat.com>
 
        * doc/tmpl/animation.sgml: Documentation changes.
index 3dd9bf9f609285d1fe0b462f4eb28ae7eaffe587..bf8caded640f4a25e86232dfc9fd1fe5c9c108ed 100644 (file)
@@ -1,3 +1,5 @@
+SUBDIRS = pixops
+
 lib_LTLIBRARIES =              \
        libgdk_pixbuf.la
 
@@ -38,7 +40,7 @@ libexec_LTLIBRARIES =         \
        $(PNM_LIB)      \
        $(BMP_LIB)
 
-noinst_PROGRAMS = testpixbuf testpixbuf-drawable testanimation
+noinst_PROGRAMS = testpixbuf testpixbuf-drawable testanimation testpixbuf-scale
 
 DEPS = libgdk_pixbuf.la
 INCLUDES = -I$(top_srcdir) -I$(top_builddir) \
@@ -52,10 +54,12 @@ LDADDS = libgdk_pixbuf.la $(LIBART_LIBS) $(GLIB_LIBS) $(GTK_LIBS)
 if INSIDE_GNOME_LIBS
 testpixbuf_LDADD = $(LDADDS) $(LIBART_LIBS) -lgmodule
 testpixbuf_drawable_LDADD = $(LDADDS)
+testpixbuf_scale_LDADD = $(LDADDS)
 testanimation_LDADD = $(LDADDS) $(LIBART_LIBS) -lgmodule
 else
 testpixbuf_LDADD = $(LDADDS) $(LIBART_LIBS) $(GNOME_LIBS) -lgmodule
 testpixbuf_drawable_LDADD = $(LDADDS) $(GNOME_LIBS)
+testpixbuf_scale_LDADD = $(LDADDS) $(GNOME_LIBS)
 testanimation_LDADD = $(LDADDS) $(LIBART_LIBS) $(GNOME_LIBS) -lgmodule
 endif
 
@@ -83,6 +87,7 @@ libgdk_pixbuf_la_SOURCES =    \
        gdk-pixbuf-io.c         \
        gdk-pixbuf-loader.c     \
        gdk-pixbuf-render.c     \
+       gdk-pixbuf-scale.c      \
        gdk-pixbuf-util.c       \
        $(CANVAS_SOURCEFILES)
 
@@ -93,6 +98,8 @@ EXTRA_GNOME_LIBS = $(GNOME_LIBS)
 endif
 
 libgdk_pixbuf_la_LDFLAGS = -version-info 1:0:0 $(EXTRA_GNOME_LIBS)
+libgdk_pixbuf_la_LIBADD = pixops/libpixops.la
+
 
 libgdk_pixbufinclude_HEADERS = \
        gdk-pixbuf.h            \
diff --git a/gdk-pixbuf/gdk-pixbuf-scale.c b/gdk-pixbuf/gdk-pixbuf-scale.c
new file mode 100644 (file)
index 0000000..6e0861f
--- /dev/null
@@ -0,0 +1,214 @@
+#include "gdk-pixbuf.h"
+#include "pixops/pixops.h"
+#include "math.h"
+
+/**
+ * gdk_pixbuf_scale:
+ * @src: a #GdkPixbuf
+ * @dest: the #GdkPixbuf into which to render the results
+ * @dest_x: 
+ * @dest_y: 
+ * @dest_width: 
+ * @dest_height: 
+ * @offset_x: the offset in the X direction (currently rounded to an integer)
+ * @offset_y: the offset in the Y direction (currently rounded to an integer)
+ * @scale_x: the scale factor in the X direction
+ * @scale_y: the scale factor in the Y direction
+ * @filter_level: the filter quality for the transformation.
+ * 
+ * Transforms the image by source image by scaling by @scale_x and @scale_y then
+ * translating by @offset_x and @offset_y, then renders the rectangle
+ * (@dest,@dest_y,@dest_width,@dest_height) of the resulting image onto the
+ * destination drawable replacing the previous contents.
+ **/
+void
+gdk_pixbuf_scale (GdkPixbuf      *src,
+                 GdkPixbuf      *dest,
+                 int             dest_x,
+                 int             dest_y,
+                 int             dest_width,
+                 int             dest_height,
+                 double          offset_x,
+                 double          offset_y,
+                 double          scale_x,
+                 double          scale_y,
+                 ArtFilterLevel  filter_level)
+{
+  offset_x = floor(offset_x + 0.5);
+  offset_y = floor(offset_y + 0.5);
+  
+  pixops_scale (dest->art_pixbuf->pixels + dest_y * dest->art_pixbuf->rowstride + dest_x * dest->art_pixbuf->n_channels,
+               -offset_x, -offset_y, dest_width - offset_x, dest_height - offset_y,
+               dest->art_pixbuf->rowstride, dest->art_pixbuf->n_channels, dest->art_pixbuf->has_alpha,
+               src->art_pixbuf->pixels, src->art_pixbuf->width, src->art_pixbuf->height,
+               src->art_pixbuf->rowstride, src->art_pixbuf->n_channels, src->art_pixbuf->has_alpha,
+               scale_x, scale_y, filter_level);
+}
+
+/**
+ * gdk_pixbuf_composite:
+ * @src: a #GdkPixbuf
+ * @dest: the #GdkPixbuf into which to render the results
+ * @dest_x: 
+ * @dest_y: 
+ * @dest_width: 
+ * @dest_height: 
+ * @offset_x: the offset in the X direction (currently rounded to an integer)
+ * @offset_y: the offset in the Y direction (currently rounded to an integer)
+ * @scale_x: the scale factor in the X direction
+ * @scale_y: the scale factor in the Y direction
+ * @filter_level: the filter quality for the transformation.
+ * @overall_alpha: overall alpha for source image (0..255)
+ * 
+ * Transforms the image by source image by scaling by @scale_x and @scale_y then
+ * translating by @offset_x and @offset_y, then composites the rectangle
+ * (@dest,@dest_y,@dest_width,@dest_height) of the resulting image onto the
+ * destination drawable.
+ **/
+void
+gdk_pixbuf_composite (GdkPixbuf      *src,
+                     GdkPixbuf      *dest,
+                     int             dest_x,
+                     int             dest_y,
+                     int             dest_width,
+                     int             dest_height,
+                     double          offset_x,
+                     double          offset_y,
+                     double          scale_x,
+                     double          scale_y,
+                     ArtFilterLevel  filter_level,
+                     int             overall_alpha)
+{
+  offset_x = floor(offset_x + 0.5);
+  offset_y = floor(offset_y + 0.5);
+  pixops_composite (dest->art_pixbuf->pixels + dest_y * dest->art_pixbuf->rowstride + dest_x * dest->art_pixbuf->n_channels,
+                   -offset_x, -offset_y, dest_width - offset_x, dest_height - offset_y,
+                   dest->art_pixbuf->rowstride, dest->art_pixbuf->n_channels, dest->art_pixbuf->has_alpha,
+                   src->art_pixbuf->pixels, src->art_pixbuf->width, src->art_pixbuf->height,
+                   src->art_pixbuf->rowstride, src->art_pixbuf->n_channels, src->art_pixbuf->has_alpha,
+                   scale_x, scale_y, filter_level, overall_alpha);
+}
+
+/**
+ * gdk_pixbuf_composite_color:
+ * @src: a #GdkPixbuf
+ * @dest: the #GdkPixbuf into which to render the results
+ * @dest_x: 
+ * @dest_y: 
+ * @dest_width: 
+ * @dest_height: 
+ * @offset_x: the offset in the X direction (currently rounded to an integer)
+ * @offset_y: the offset in the Y direction (currently rounded to an integer)
+ * @scale_x: the scale factor in the X direction
+ * @scale_y: the scale factor in the Y direction
+ * @filter_level: the filter quality for the transformation.
+ * @overall_alpha: overall alpha for source image (0..255)
+ * @check_x: the X offset for the checkboard (origin of checkboard is at -@check_x, -@check_y)
+ * @check_y: the Y offset for the checkboard 
+ * @check_size: the size of checks in the checkboard (must be a power of two)
+ * @color1: the color of check at upper left
+ * @color2: the color of the other check
+ * 
+ * Transforms the image by source image by scaling by @scale_x and @scale_y then
+ * translating by @offset_x and @offset_y, then composites the rectangle
+ * (@dest,@dest_y,@dest_width,@dest_height) of the resulting image with
+ * a checkboard of the colors @color1 and @color2 and renders it onto the
+ * destination drawable.
+ **/
+void
+gdk_pixbuf_composite_color (GdkPixbuf      *src,
+                           GdkPixbuf      *dest,
+                           int             dest_x,
+                           int             dest_y,
+                           int             dest_width,
+                           int             dest_height,
+                           double          offset_x,
+                           double          offset_y,
+                           double          scale_x,
+                           double          scale_y,
+                           ArtFilterLevel  filter_level,
+                           int             overall_alpha,
+                           int             check_x,
+                           int             check_y,
+                           int             check_size,
+                           art_u32         color1,
+                           art_u32         color2)
+{
+  offset_x = floor(offset_x + 0.5);
+  offset_y = floor(offset_y + 0.5);
+  
+  pixops_composite_color (dest->art_pixbuf->pixels + dest_y * dest->art_pixbuf->rowstride + dest_x * dest->art_pixbuf->n_channels,
+                         -offset_x, -offset_y, dest_width - offset_x, dest_height - offset_y,
+                         dest->art_pixbuf->rowstride, dest->art_pixbuf->n_channels, dest->art_pixbuf->has_alpha,
+                         src->art_pixbuf->pixels, src->art_pixbuf->width, src->art_pixbuf->height,
+                         src->art_pixbuf->rowstride, src->art_pixbuf->n_channels, src->art_pixbuf->has_alpha,
+                         scale_x, scale_y, filter_level, overall_alpha, check_x, check_y, check_size, color1, color2);
+}
+
+/**
+ * gdk_pixbuf_scale_simple:
+ * @src: a #GdkPixbuf
+ * @dest_width: the width of destination image
+ * @dest_height: the height of destination image
+ * @filter_level: the filter quality for the transformation.
+ * 
+ * Scale the #GdkPixbuf @src to @dest_width x @dest_height and render the result into
+ * a new #GdkPixbuf.
+ * 
+ * Return value: the new #GdkPixbuf
+ **/
+GdkPixbuf *
+gdk_pixbuf_scale_simple (GdkPixbuf      *src,
+                        int             dest_width,
+                        int             dest_height,
+                        ArtFilterLevel  filter_level)
+{
+  GdkPixbuf *dest = gdk_pixbuf_new (ART_PIX_RGB, src->art_pixbuf->has_alpha, 8, dest_width, dest_height);
+
+  gdk_pixbuf_scale (src, dest,  0, 0, dest_width, dest_height, 0, 0,
+                   (double)dest_width / src->art_pixbuf->width,
+                   (double)dest_height / src->art_pixbuf->height,
+                   filter_level);
+
+  return dest;
+}
+
+/**
+ * gdk_pixbuf_composite_color_simple:
+ * @src: a #GdkPixbuf
+ * @dest_width: the width of destination image
+ * @dest_height: the height of destination image
+ * @filter_level: the filter quality for the transformation.
+ * @overall_alpha: overall alpha for source image (0..255)
+ * @check_size: the size of checks in the checkboard (must be a power of two)
+ * @color1: the color of check at upper left
+ * @color2: the color of the other check
+ * 
+ * Scale the #GdkPixbuf @src to @dest_width x @dest_height composite the result with
+ * a checkboard of colors @color1 and @color2 and render the result into
+ * a new #GdkPixbuf.
+ * 
+ * Return value: the new #GdkPixbuf
+ **/
+GdkPixbuf *
+gdk_pixbuf_composite_color_simple (GdkPixbuf      *src,
+                                  int             dest_width,
+                                  int             dest_height,
+                                  ArtFilterLevel  filter_level,
+                                  int             overall_alpha,
+                                  int             check_size,
+                                  art_u32         color1,
+                                  art_u32         color2)
+{
+  GdkPixbuf *dest = gdk_pixbuf_new (ART_PIX_RGB, src->art_pixbuf->has_alpha, 8, dest_width, dest_height);
+
+  gdk_pixbuf_composite_color (src, dest, 0, 0, dest_width, dest_height, 0, 0,
+                             (double)dest_width / src->art_pixbuf->width,
+                             (double)dest_height / src->art_pixbuf->height,
+                             filter_level, overall_alpha, 0, 0, check_size, color1, color2);
+
+  return dest;
+}
+
+
+
index 7a30f3c8eda0ba4351b20bf849248e63e6a77c0d..28e7f4da64712b17f21e593d268775f5716030f2 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <libart_lgpl/art_misc.h>
 #include <libart_lgpl/art_pixbuf.h>
+#include <libart_lgpl/art_filterlevel.h>
 #include <gdk/gdk.h>
 
 #ifdef __cplusplus
@@ -162,6 +163,62 @@ GdkPixbuf *gdk_pixbuf_get_from_drawable (GdkPixbuf *dest,
                                         int dest_x, int dest_y,
                                         int width, int height);
 
+/* Scaling */
+
+void gdk_pixbuf_scale           (GdkPixbuf      *src,
+                                GdkPixbuf      *dest,
+                                int             dest_x,
+                                int             dest_y,
+                                int             dest_width,
+                                int             dest_height,
+                                double          offset_x,
+                                double          offset_y,
+                                double          scale_x,
+                                double          scale_y,
+                                ArtFilterLevel  filter_level);
+void gdk_pixbuf_composite       (GdkPixbuf      *src,
+                                GdkPixbuf      *dest,
+                                int             dest_x,
+                                int             dest_y,
+                                int             dest_width,
+                                int             dest_height,
+                                double          offset_x,
+                                double          offset_y,
+                                double          scale_x,
+                                double          scale_y,
+                                ArtFilterLevel  filter_level,
+                                int             overall_alpha);
+void gdk_pixbuf_composite_color (GdkPixbuf      *src,
+                                GdkPixbuf      *dest,
+                                int             dest_x,
+                                int             dest_y,
+                                int             dest_width,
+                                int             dest_height,
+                                double          offset_x,
+                                double          offset_y,
+                                double          scale_x,
+                                double          scale_y,
+                                ArtFilterLevel  filter_level,
+                                int             overall_alpha,
+                                int             check_x,
+                                int             check_y,
+                                int             check_size,
+                                art_u32         color1,
+                                art_u32         color2);
+
+GdkPixbuf *gdk_pixbuf_scale_simple           (GdkPixbuf      *src,
+                                             int             dest_width,
+                                             int             dest_height,
+                                             ArtFilterLevel  filter_level);
+GdkPixbuf *gdk_pixbuf_composite_color_simple (GdkPixbuf      *src,
+                                             int             dest_width,
+                                             int             dest_height,
+                                             ArtFilterLevel  filter_level,
+                                             int             overall_alpha,
+                                             int             check_size,
+                                             art_u32         color1,
+                                             art_u32         color2);
+
 /* Animation support */
 
 GdkPixbufAnimation *gdk_pixbuf_animation_new_from_file (const char *filename);
diff --git a/gdk-pixbuf/pixops/.cvsignore b/gdk-pixbuf/pixops/.cvsignore
new file mode 100644 (file)
index 0000000..8027cd3
--- /dev/null
@@ -0,0 +1,7 @@
+Makefile
+Makefile.in
+.libs
+.deps
+*.lo
+*.la
+timescale
diff --git a/gdk-pixbuf/pixops/Makefile.am b/gdk-pixbuf/pixops/Makefile.am
new file mode 100644 (file)
index 0000000..bcb90bc
--- /dev/null
@@ -0,0 +1,22 @@
+noinst_LTLIBRARIES = libpixops.la
+
+INCLUDES = $(GLIB_CFLAGS)
+
+bin_PROGRAMS = timescale
+
+timescale_SOURCES = timescale.c
+timescale_LDADD = libpixops.la $(GLIB_LIBS)
+
+if USE_MMX
+mmx_sources =                          \
+       have_mmx.S                      \
+       scale_line_22_33_mmx.S          \
+       composite_line_22_4a4_mmx.S     \
+       composite_line_color_22_4a4_mmx.S
+endif
+
+libpixops_la_SOURCES =                 \
+       pixops.c                        \
+       pixops.h                        \
+       pixops-internal.h               \
+       $(mmx_sources)
diff --git a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S
new file mode 100644 (file)
index 0000000..f3edc8a
--- /dev/null
@@ -0,0 +1,208 @@
+       .file   "composite_line_22_4a4_mmx.S"
+       .version        "01.01"
+gcc2_compiled.:
+.text
+       .align 16
+.globl pixops_composite_line_22_4a4_mmx
+       .type    pixops_composite_line_22_4a4_mmx,@function
+/*
+ * Arguments
+ *             
+ * weights:     8(%ebp)
+ * p:          12(%ebp)        %esi
+ * q1:         16(%ebp)        
+ * q2:         20(%ebp)        
+ * xstep:       24(%ebp)       
+ * p_end:       28(%ebp)
+ * xinit:       32(%ebp)
+ *     
+*/
+pixops_composite_line_22_4a4_mmx:
+/*
+ * Function call entry
+ */
+       pushl %ebp
+       movl %esp,%ebp
+       subl $28,%esp
+       pushl %edi
+       pushl %esi
+       pushl %ebx
+/* Locals:     
+ * int x                      %ebx
+ * int x_scaled             -24(%ebp)
+ */
+
+/*
+ * Setup
+ */
+/* Initialize variables */     
+       movl 32(%ebp),%ebx
+       movl 32(%ebp),%edx
+       sarl $16,%edx
+       movl 12(%ebp),%esi
+
+       movl %edx,-24(%ebp)
+
+       cmpl %esi,28(%ebp)
+       je   .out
+
+/* Load initial values into %mm1, %mm3 */
+       shll $2, %edx
+
+       pxor %mm4, %mm4
+       
+       movl 16(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+       orl  $0xff000000, %eax
+       movd %eax, %mm1
+       punpcklbw %mm4, %mm1
+       pmullw %mm5,%mm1
+
+       movl -24(%ebp),%edx
+       shll $2, %edx
+               
+       movl 20(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+       orl  $0xff000000, %eax
+       movd %eax, %mm3
+       punpcklbw %mm4, %mm3
+       pmullw %mm5,%mm3
+
+       psrlw $8,%mm1
+       psrlw $8,%mm3
+
+       addl $65536,%ebx
+       movl %ebx,%edx
+       sarl $16,%edx
+
+       jmp .newx
+       .p2align 4,,7
+.loop:
+/* int x_index = (x & 0xf000) >> 12 */
+       movl %ebx,%eax
+       andl $0xf000,%eax
+       shrl $7,%eax
+
+       movq (%edi,%eax),%mm4
+       pmullw %mm0,%mm4
+       movq 8(%edi,%eax),%mm5
+       pmullw %mm1,%mm5
+       movq 16(%edi,%eax),%mm6
+       movq 24(%edi,%eax),%mm7
+       pmullw %mm2,%mm6
+       pmullw %mm3,%mm7
+       paddw %mm4, %mm5
+       paddw %mm6, %mm7
+       paddw %mm5, %mm7
+
+       movl $0xffff,%ecx
+       movd %ecx,%mm4
+       psllq $48,%mm4
+       movq %mm4,%mm6
+       psubw %mm7,%mm4
+       pand %mm6,%mm4
+       
+       movq %mm4,%mm5
+       psrlq $16,%mm4
+       por %mm4,%mm5
+       psrlq $32,%mm5
+       por %mm4,%mm5
+       
+       psrlw $8,%mm5
+
+       movd (%esi),%mm7
+       pxor %mm4,%mm4
+       punpcklbw %mm4, %mm7
+               
+       pmullw %mm7,%mm5
+
+/* x += x_step; */
+       addl 24(%ebp),%ebx
+/* x_scale = x >> 16; */
+       movl %ebx,%edx
+       sarl $16,%edx
+
+       paddw %mm5,%mm6
+
+       psrlw $8,%mm6
+       packuswb %mm6, %mm6 
+       movd %mm6,(%esi)
+
+       addl $4, %esi
+               
+       cmpl %esi,28(%ebp)
+       je   .out
+
+       cmpl %edx,-24(%ebp)
+       je   .loop
+
+.newx:
+       movl %edx,-24(%ebp)
+/*
+ * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
+ */
+       movq %mm1, %mm0
+       movq %mm3, %mm2
+
+       shll $2, %edx
+
+#      %mm4 will always be already clear here  
+#      pxor %mm4, %mm4
+
+       movl 16(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+/* 
+ *     mull destroyed %edx, need to reconstitute 
+ */
+       movl -24(%ebp),%edx
+       shll $2, %edx
+
+       orl  $0xff000000, %eax
+       movd %eax, %mm1
+       punpcklbw %mm4, %mm1
+       pmullw %mm5,%mm1
+               
+       movl 20(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+       orl  $0xff000000, %eax
+       movd %eax, %mm3
+       punpcklbw %mm4, %mm3
+       pmullw %mm5,%mm3
+       
+       psrlw $8,%mm1
+       psrlw $8,%mm3
+
+       movl 8(%ebp),%edi
+       
+       jmp .loop
+
+.out:
+       movl %esi,%eax
+       emms
+       leal -40(%ebp),%esp
+       popl %ebx
+       popl %esi
+       popl %edi
+       movl %ebp,%esp
+       popl %ebp
+       ret
diff --git a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S
new file mode 100644 (file)
index 0000000..e3fd640
--- /dev/null
@@ -0,0 +1,219 @@
+       .file   "composite_line_color_22_4a4_mmx.S"
+       .version        "01.01"
+gcc2_compiled.:
+.text
+       .align 16
+.globl pixops_composite_line_color_22_4a4_mmx
+       .type    pixops_composite_line_color_22_4a4_mmx,@function
+/*
+ * Arguments
+ *             
+ * weights:     8(%ebp)
+ * p:          12(%ebp)        %esi
+ * q1:         16(%ebp)        
+ * q2:         20(%ebp)        
+ * xstep:       24(%ebp)       
+ * p_end:       28(%ebp)
+ * xinit:       32(%ebp)
+ * dest_x:     36(%ebp)
+ * check_shift:        40(%ebp)
+ * colors:     44(%ebp)
+ *     
+*/
+pixops_composite_line_color_22_4a4_mmx:
+/*
+ * Function call entry
+ */
+       pushl %ebp
+       movl %esp,%ebp
+       subl $28,%esp
+       pushl %edi
+       pushl %esi
+       pushl %ebx
+/* Locals:     
+ * int x                      %ebx
+ * int x_scaled             -24(%ebp)
+ */
+
+/*
+ * Setup
+ */
+/* Initialize variables */     
+       movl 32(%ebp),%ebx
+       movl 32(%ebp),%edx
+       sarl $16,%edx
+       movl 12(%ebp),%esi
+
+       movl %edx,-24(%ebp)
+
+       cmpl %esi,28(%ebp)
+       je   .out
+
+/* Load initial values into %mm1, %mm3 */
+       shll $2, %edx
+
+       pxor %mm4, %mm4
+
+       movl 16(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+       orl  $0xff000000, %eax
+       movd %eax, %mm1
+       punpcklbw %mm4, %mm1
+       pmullw %mm5,%mm1
+
+/* 
+ *     mull destroyed %edx, need to reconstitute 
+ */
+       movl -24(%ebp),%edx
+       shll $2, %edx
+               
+       movl 20(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+       orl  $0xff000000, %eax
+       movd %eax, %mm3
+       punpcklbw %mm4, %mm3
+       pmullw %mm5,%mm3
+
+       psrlw $8,%mm1
+       psrlw $8,%mm3
+
+       addl $65536,%ebx
+       movl %ebx,%edx
+       sarl $16,%edx
+
+       jmp .newx
+       .p2align 4,,7
+.loop:
+/* int x_index = (x & 0xf000) >> 12 */
+       movl %ebx,%eax
+       andl $0xf000,%eax
+       shrl $7,%eax
+
+       movq (%edi,%eax),%mm4
+       pmullw %mm0,%mm4
+       movq 8(%edi,%eax),%mm5
+       pmullw %mm1,%mm5
+       movq 16(%edi,%eax),%mm6
+       movq 24(%edi,%eax),%mm7
+       pmullw %mm2,%mm6
+       pmullw %mm3,%mm7
+       paddw %mm4, %mm5
+       paddw %mm6, %mm7
+       paddw %mm5, %mm7
+
+       movl $0xffff,%ecx
+       movd %ecx,%mm4
+       psllq $48,%mm4
+       movq %mm4,%mm6
+       psubw %mm7,%mm4
+       pand %mm6,%mm4
+       
+       movq %mm4,%mm5
+       psrlq $16,%mm4
+       por %mm4,%mm5
+       psrlq $32,%mm5
+       por %mm4,%mm5
+       
+       psrlw $8,%mm5
+
+       movl 36(%ebp),%eax
+       incl 36(%ebp)
+
+       movl 40(%ebp),%ecx
+       shrl %cl,%eax
+       andl $1,%eax
+
+       movl 44(%ebp),%ecx
+       movq (%ecx,%eax,8),%mm6
+
+       pmullw %mm6,%mm5
+
+/* x += x_step; */
+       addl 24(%ebp),%ebx
+/* x_scale = x >> 16; */
+       movl %ebx,%edx
+       sarl $16,%edx
+
+       paddw %mm5,%mm7
+
+       psrlw $8,%mm7
+       packuswb %mm7, %mm7 
+       movd %mm7,(%esi)
+
+       addl $4, %esi
+               
+       cmpl %esi,28(%ebp)
+       je   .out
+
+       cmpl %edx,-24(%ebp)
+       je   .loop
+
+.newx:
+       movl %edx,-24(%ebp)
+/*
+ * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
+ */
+       movq %mm1, %mm0
+       movq %mm3, %mm2
+
+       shll $2, %edx
+
+       pxor %mm4, %mm4
+
+       movl 16(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+/* 
+ *     mull destroyed %edx, need to reconstitute 
+ */
+       movl -24(%ebp),%edx
+       shll $2, %edx
+
+       orl  $0xff000000, %eax
+       movd %eax, %mm1
+       punpcklbw %mm4, %mm1
+       pmullw %mm5,%mm1
+               
+       movl 20(%ebp),%edi
+       movl (%edi, %edx), %eax
+       movd (%edi, %edx), %mm5
+       punpcklbw %mm4, %mm5
+       shrl $24, %eax
+       movl $0x010101, %ecx
+       mull %ecx
+       orl  $0xff000000, %eax
+       movd %eax, %mm3
+       punpcklbw %mm4, %mm3
+       pmullw %mm5,%mm3
+       
+       psrlw $8,%mm1
+       psrlw $8,%mm3
+
+       movl 8(%ebp),%edi
+       
+       jmp .loop
+
+.out:
+       movl %esi,%eax
+       emms
+       leal -40(%ebp),%esp
+       popl %ebx
+       popl %esi
+       popl %edi
+       movl %ebp,%esp
+       popl %ebp
+       ret
diff --git a/gdk-pixbuf/pixops/have_mmx.S b/gdk-pixbuf/pixops/have_mmx.S
new file mode 100644 (file)
index 0000000..da22250
--- /dev/null
@@ -0,0 +1,42 @@
+       .file   "have_mmx.S"
+       .version        "01.01"
+gcc2_compiled.:
+.text
+       .align 16
+.globl pixops_have_mmx
+       .type    pixops_have_mmx,@function
+
+pixops_have_mmx:
+       push    %ebx
+
+# Check if bit 21 in flags word is writeable
+
+       pushfl  
+       popl    %eax
+       movl    %eax,%ebx
+       xorl    $0x00200000, %eax
+       pushl   %eax
+       popfl
+       pushfl
+       popl    %eax
+
+       cmpl    %eax, %ebx
+
+       je .notfound
+
+# OK, we have CPUID
+
+       movl    $1, %eax
+       cpuid
+       
+       test    $0x00800000, %edx
+       jz      .notfound
+
+       movl    $1, %eax
+       jmp     .out
+
+.notfound:
+       movl    $0, %eax
+.out:  
+       popl    %ebx
+       ret
\ No newline at end of file
diff --git a/gdk-pixbuf/pixops/pixops-internal.h b/gdk-pixbuf/pixops/pixops-internal.h
new file mode 100644 (file)
index 0000000..b6ed3c0
--- /dev/null
@@ -0,0 +1,7 @@
+#ifdef USE_MMX
+art_u8 *pixops_scale_line_22_33_mmx (art_u32 weights[16][8], art_u8 *p, art_u8 *q1, art_u8 *q2, int x_step, art_u8 *p_stop, int x_init);
+art_u8 *pixops_composite_line_22_4a4_mmx (art_u32 weights[16][8], art_u8 *p, art_u8 *q1, art_u8 *q2, int x_step, art_u8 *p_stop, int x_init);
+art_u8 *pixops_composite_line_color_22_4a4_mmx (art_u32 weights[16][8], art_u8 *p, art_u8 *q1, art_u8 *q2, int x_step, art_u8 *p_stop, int x_init, int dest_x, int check_shift, int *colors);
+int pixops_have_mmx (void);
+#endif
+
diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c
new file mode 100644 (file)
index 0000000..ac90106
--- /dev/null
@@ -0,0 +1,1519 @@
+#include <math.h>
+#include <glib.h>
+#include "config.h"
+
+#include "pixops.h"
+#include "pixops-internal.h"
+
+#define SUBSAMPLE_BITS 4
+#define SUBSAMPLE (1 << SUBSAMPLE_BITS)
+#define SUBSAMPLE_MASK ((1 << SUBSAMPLE_BITS)-1)
+#define SCALE_SHIFT 16
+
+typedef struct _PixopsFilter PixopsFilter;
+
+struct _PixopsFilter
+{
+  int *weights;
+  int n_x;
+  int n_y;
+  double x_offset;
+  double y_offset;
+}; 
+
+typedef art_u8 *(*PixopsLineFunc) (int *weights, int n_x, int n_y,
+                                  art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                                  art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                                  int x_init, int x_step, int src_width,
+                                  int check_size, art_u32 color1, art_u32 color2);
+
+typedef void (*PixopsPixelFunc) (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha,
+                                int src_has_alpha, int check_size, art_u32 color1,
+                                art_u32 color2,
+                                int r, int g, int b, int a);
+
+static int
+get_check_shift (int check_size)
+{
+  int check_shift = 0;
+  g_return_val_if_fail (check_size >= 0, 4);
+
+  while (!(check_size & 1))
+    {
+      check_shift++;
+      check_size >>= 1;
+    }
+
+  return check_shift;
+}
+
+static void
+pixops_scale_nearest (art_u8        *dest_buf,
+                     int            render_x0,
+                     int            render_y0,
+                     int            render_x1,
+                     int            render_y1,
+                     int            dest_rowstride,
+                     int            dest_channels,
+                     art_boolean    dest_has_alpha,
+                     art_u8        *src_buf,
+                     int            src_width,
+                     int            src_height,
+                     int            src_rowstride,
+                     int            src_channels,
+                     art_boolean    src_has_alpha,
+                     double         scale_x,
+                     double         scale_y)
+{
+  int i, j;
+  int x;
+  int x_step = (1 << SCALE_SHIFT) / scale_x;
+  int y_step = (1 << SCALE_SHIFT) / scale_y;
+
+#define INNER_LOOP(SRC_CHANNELS,DEST_CHANNELS)                         \
+      for (j=0; j < (render_x1 - render_x0); j++)              \
+       {                                                       \
+         art_u8 *p = src + (x >> SCALE_SHIFT) * SRC_CHANNELS;  \
+                                                               \
+         dest[0] = p[0];                                       \
+         dest[1] = p[1];                                       \
+         dest[2] = p[2];                                       \
+                                                               \
+         if (DEST_CHANNELS == 4)                               \
+           {                                                   \
+             if (SRC_CHANNELS == 4)                            \
+               *(dest++) = p[3];                               \
+             else                                              \
+               *(dest++) = 0xff;                               \
+           }                                                   \
+                                                               \
+         dest += DEST_CHANNELS;                                \
+         x += x_step;                                          \
+       }
+
+  for (i = 0; i < (render_y1 - render_y0); i++)
+    {
+      art_u8 *src = src_buf + ((i * y_step + y_step / 2) >> SCALE_SHIFT) * src_rowstride;
+      art_u8 *dest = dest_buf + i * dest_rowstride;
+
+      x = render_x0 * x_step + x_step / 2;
+
+      if (src_channels == 3)
+       {
+         if (dest_channels == 3)
+           {
+             INNER_LOOP (3, 3);
+           }
+         else
+           {
+             INNER_LOOP (3, 4);
+           }
+       }
+      else if (src_channels == 4)
+       {
+         if (dest_channels == 3)
+           {
+             INNER_LOOP (4, 3);
+           }
+         else
+           {
+             for (j=0; j < (render_x1 - render_x0); j++)
+               {
+                 art_u8 *p = src + (x >> SCALE_SHIFT) * 4;
+
+                 *(art_u32 *)dest = *(art_u32 *)p;
+                 
+                 dest += 4;
+                 x += x_step;
+               }
+           }
+       }
+    }
+#undef INNER_LOOP  
+}
+
+static void
+pixops_composite_nearest (art_u8        *dest_buf,
+                         int            render_x0,
+                         int            render_y0,
+                         int            render_x1,
+                         int            render_y1,
+                         int            dest_rowstride,
+                         int            dest_channels,
+                         art_boolean    dest_has_alpha,
+                         art_u8        *src_buf,
+                         int            src_width,
+                         int            src_height,
+                         int            src_rowstride,
+                         int            src_channels,
+                         art_boolean    src_has_alpha,
+                         double         scale_x,
+                         double         scale_y,
+                         int            overall_alpha)
+{
+  int i, j;
+  int x;
+  int x_step = (1 << SCALE_SHIFT) / scale_x;
+  int y_step = (1 << SCALE_SHIFT) / scale_y;
+
+  for (i = 0; i < (render_y1 - render_y0); i++)
+    {
+      art_u8 *src = src_buf + (((i + render_y0) * y_step + y_step / 2) >> SCALE_SHIFT) * src_rowstride;
+      art_u8 *dest = dest_buf + i * dest_rowstride + render_x0 * dest_channels;
+
+      x = render_x0 * x_step + x_step / 2;
+      
+      for (j=0; j < (render_x1 - render_x0); j++)
+       {
+         art_u8 *p = src + (x >> SCALE_SHIFT) * src_channels;
+          unsigned int a0;
+
+         if (src_has_alpha)
+           a0 = (p[3] * overall_alpha + 0xff) >> 8;
+         else
+           a0 = overall_alpha;
+
+         if (dest_has_alpha)
+           {
+             unsigned int a1 = dest[3];
+             unsigned int total = a0 + a1;
+
+             if (total)
+               {
+                 dest[0] = (a0 * src[0] + a1 * dest[0]) / (total);
+                 dest[1] = (a0 * src[1] + a1 * dest[1]) / (total);
+                 dest[2] = (a0 * src[2] + a1 * dest[2]) / (total);
+                 dest[3] = total - ((a0 * a1 + 0xff) >> 8);
+               }
+             else
+               {
+                 dest[0] = 0;
+                 dest[1] = 0;
+                 dest[2] = 0;
+                 dest[3] = 0;
+               }
+           }
+         else
+           {
+             dest[0] = dest[0] + ((a0 * (p[0] - dest[0]) + 0xff) >> 8);
+             dest[1] = dest[1] + ((a0 * (p[1] - dest[1]) + 0xff) >> 8);
+             dest[2] = dest[2] + ((a0 * (p[2] - dest[2]) + 0xff) >> 8);
+
+             if (dest_channels == 4)
+               *(dest++) = 0xff;
+           }
+
+         dest += dest_channels;
+         x += x_step;
+       }
+    }
+}
+
+static void
+pixops_composite_color_nearest (art_u8        *dest_buf,
+                               int            render_x0,
+                               int            render_y0,
+                               int            render_x1,
+                               int            render_y1,
+                               int            dest_rowstride,
+                               int            dest_channels,
+                               art_boolean    dest_has_alpha,
+                               art_u8        *src_buf,
+                               int            src_width,
+                               int            src_height,
+                               int            src_rowstride,
+                               int            src_channels,
+                               art_boolean    src_has_alpha,
+                               double         scale_x,
+                               double         scale_y,
+                               int            overall_alpha,
+                               int            check_x,
+                               int            check_y,
+                               int            check_size,
+                               art_u32        color1,
+                               art_u32        color2)
+{
+  int i, j;
+  int x;
+  int x_step = (1 << SCALE_SHIFT) / scale_x;
+  int y_step = (1 << SCALE_SHIFT) / scale_y;
+  int r1, g1, b1, r2, g2, b2;
+  int check_shift = get_check_shift (check_size);
+
+  for (i = 0; i < (render_y1 - render_y0); i++)
+    {
+      art_u8 *src = src_buf + (((i + render_y0) * y_step + y_step/2) >> SCALE_SHIFT) * src_rowstride;
+      art_u8 *dest = dest_buf + i * dest_rowstride;
+
+      x = render_x0 * x_step + x_step / 2;
+      
+      if (((i + check_y) >> check_shift) & 1)
+       {
+         r1 = color2 & 0xff0000 >> 16;
+         g1 = color2 & 0xff00 >> 8;
+         b1 = color2 & 0xff;
+
+         r2 = color1 & 0xff0000 >> 16;
+         g2 = color1 & 0xff00 >> 8;
+         b2 = color1 & 0xff;
+       }
+      else
+       {
+         r1 = color1 & 0xff0000 >> 16;
+         g1 = color1 & 0xff00 >> 8;
+         b1 = color1 & 0xff;
+
+         r2 = color2 & 0xff0000 >> 16;
+         g2 = color2 & 0xff00 >> 8;
+         b2 = color2 & 0xff;
+       }
+
+      for (j=0 ; j < (render_x1 - render_x0); j++)
+       {
+         art_u8 *p = src + (x >> SCALE_SHIFT) * src_channels;
+          unsigned int a0;
+
+         if (src_has_alpha)
+           a0 = (p[3] * overall_alpha + 0xff) >> 8;
+         else
+           a0 = overall_alpha;
+
+         if ((j + check_x >> check_shift) & 1)
+           {
+             dest[0] = r2 + ((a0 * ((int)p[0] - r2) + 0xff) >> 8);
+             dest[1] = g2 + ((a0 * ((int)p[1] - g2) + 0xff) >> 8);
+             dest[2] = b2 + ((a0 * ((int)p[2] - b2) + 0xff) >> 8);
+           }
+         else
+           {
+             dest[0] = r1 + ((a0 * ((int)p[0] - r1) + 0xff) >> 8);
+             dest[1] = g1 + ((a0 * ((int)p[1] - g1) + 0xff) >> 8);
+             dest[2] = b1 + ((a0 * ((int)p[2] - b1) + 0xff) >> 8);
+           }
+         
+         if (dest_channels == 4)
+           *(dest++) = 0xff;
+
+         dest += dest_channels;
+         x += x_step;
+       }
+    }
+}
+
+static void
+composite_pixel (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha,
+                int src_has_alpha, int check_size, art_u32 color1, art_u32 color2,
+                int r, int g, int b, int a)
+{
+  if (dest_has_alpha)
+    {
+      unsigned int w = (((1 << 16) - a) * dest[3]) >> 8;
+      unsigned int total = a + w;
+
+      if (total)
+       {
+         dest[0] = (r + w * dest[0]) / total;
+         dest[1] = (g + w * dest[1]) / total;
+         dest[2] = (b + w * dest[2]) / total;
+         dest[3] = (r * w) >> 16;
+       }
+      else
+       {
+         dest[0] = 0;
+         dest[1] = 0;
+         dest[2] = 0;
+         dest[3] = 0;
+       }
+    }
+  else
+    {
+      dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24;
+      dest[1] = ((0xff0000 - a) * dest[1] + g) >> 24;
+      dest[2] = ((0xff0000 - a) * dest[2] + b) >> 24;
+    }
+}
+
+static art_u8 *
+composite_line (int *weights, int n_x, int n_y,
+               art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+               art_u8 **src, int src_channels, art_boolean src_has_alpha,
+               int x_init, int x_step, int src_width,
+               int check_size, art_u32 color1, art_u32 color2)
+{
+  int x = x_init;
+  int i, j;
+
+  while (dest < dest_end)
+    {
+      int x_scaled = x >> SCALE_SHIFT;
+      unsigned int r = 0, g = 0, b = 0, a = 0;
+      int *pixel_weights;
+      
+      pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y;
+
+      for (i=0; i<n_y; i++)
+       {
+         art_u8 *q = src[i] + x_scaled * src_channels;
+         int *line_weights = pixel_weights + n_x * i;
+         
+         for (j=0; j<n_x; j++)
+           {
+             unsigned int ta;
+
+             if (src_has_alpha)
+               ta = q[3] * line_weights[j];
+             else
+               ta = 0xff * line_weights[j];
+                 
+             r += ta * q[0];
+             g += ta * q[1];
+             b += ta * q[2];
+             a += ta;
+
+             q += src_channels;
+           }
+       }
+
+      if (dest_has_alpha)
+       {
+         unsigned int w = (((1 << 16) - a) * dest[3]) >> 8;
+         unsigned int total = a + w;
+
+         if (total)
+           {
+             dest[0] = (r + w * dest[0]) / total;
+             dest[1] = (r + w * dest[1]) / total;
+             dest[2] = (r + w * dest[2]) / total;
+             dest[3] = (r * w) >> 16;
+           }
+         else
+           {
+             dest[0] = 0;
+             dest[1] = 0;
+             dest[2] = 0;
+             dest[3] = 0;
+           }
+       }
+      else
+       {
+         dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24;
+         dest[1] = ((0xff0000 - a) * dest[1] + g) >> 24;
+         dest[2] = ((0xff0000 - a) * dest[2] + b) >> 24;
+       }
+      
+      dest += dest_channels;
+      x += x_step;
+    }
+
+  return dest;
+}
+
+static art_u8 *
+composite_line_22_4a4 (int *weights, int n_x, int n_y,
+                      art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                      art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                      int x_init, int x_step, int src_width,
+                      int check_size, art_u32 color1, art_u32 color2)
+{
+  int x = x_init;
+  art_u8 *src0 = src[0];
+  art_u8 *src1 = src[1];
+
+  g_return_val_if_fail (src_channels != 3, dest);
+  g_return_val_if_fail (src_has_alpha, dest);
+  
+  while (dest < dest_end)
+    {
+      int x_scaled = x >> SCALE_SHIFT;
+      unsigned int r, g, b, a, ta;
+      int *pixel_weights;
+      art_u8 *q0, *q1;
+      int w1, w2, w3, w4;
+      
+      q0 = src0 + x_scaled * 4;
+      q1 = src1 + x_scaled * 4;
+      
+      pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4)));
+      
+      w1 = pixel_weights[0];
+      w2 = pixel_weights[1];
+      w3 = pixel_weights[2];
+      w4 = pixel_weights[3];
+
+      a = w1 * q0[3];
+      r = a * q0[0];
+      g = a * q0[1];
+      b = a * q0[2];
+
+      ta = w2 * q0[7];
+      r += ta * q0[4];
+      g += ta * q0[5];
+      b += ta * q0[6];
+      a += ta;
+
+      ta = w3 * q0[3];
+      r += ta * q0[0];
+      g += ta * q0[1];
+      b += ta * q0[2];
+      a += ta;
+
+      ta += w4 * q1[7];
+      r += ta * q1[4];
+      g += ta * q1[5];
+      b += ta * q1[6];
+      a += ta;
+
+      dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24;
+      dest[1] = ((0xff0000 - a) * dest[1] + g) >> 24;
+      dest[2] = ((0xff0000 - a) * dest[2] + b) >> 24;
+      dest[3] = a >> 16;
+      
+      dest += 4;
+      x += x_step;
+    }
+
+  return dest;
+}
+
+#ifdef USE_MMX
+static art_u8 *
+composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y,
+                               art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                               art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                               int x_init, int x_step, int src_width,
+                               int check_size, art_u32 color1, art_u32 color2)
+{
+  art_u32 mmx_weights[16][8];
+  int j;
+
+  for (j=0; j<16; j++)
+    {
+      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
+      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
+      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
+      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
+      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
+      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
+      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
+      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
+    }
+
+  return pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1], x_step, dest_end, x_init);
+}
+#endif /* USE_MMX */
+
+static void
+composite_pixel_color (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha,
+                      int src_has_alpha, int check_size, art_u32 color1, art_u32 color2,
+                      int r, int g, int b, int a)
+{
+  int dest_r, dest_g, dest_b;
+  int check_shift = get_check_shift (check_size);
+
+  if ((dest_x >> check_shift) & 1)
+    {
+      dest_r = color2 & 0xff0000 >> 16;
+      dest_g = color2 & 0xff00 >> 8;
+      dest_b = color2 & 0xff;
+    }
+  else
+    {
+      dest_r = color1 & 0xff0000 >> 16;
+      dest_g = color1 & 0xff00 >> 8;
+      dest_b = color1 & 0xff;
+    }
+
+  dest[0] = ((0xff0000 - a) * dest_r + r) >> 24;
+  dest[1] = ((0xff0000 - a) * dest_g + g) >> 24;
+  dest[2] = ((0xff0000 - a) * dest_b + b) >> 24;
+
+  if (dest_has_alpha)
+    dest[3] = 0xff;
+  else if (dest_channels == 4)
+    dest[3] = a >> 16;
+}
+
+static art_u8 *
+composite_line_color (int *weights, int n_x, int n_y,
+                     art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                     art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                     int x_init, int x_step, int src_width,
+                     int check_size, art_u32 color1, art_u32 color2)
+{
+  int x = x_init;
+  int i, j;
+  int check_shift = get_check_shift (check_size);
+  int dest_r1, dest_g1, dest_b1;
+  int dest_r2, dest_g2, dest_b2;
+
+  g_return_val_if_fail (check_size != 0, dest);
+
+  dest_r1 = color1 & 0xff0000 >> 16;
+  dest_g1 = color1 & 0xff00 >> 8;
+  dest_b1 = color1 & 0xff;
+
+  dest_r2 = color2 & 0xff0000 >> 16;
+  dest_g2 = color2 & 0xff00 >> 8;
+  dest_b2 = color2 & 0xff;
+
+  while (dest < dest_end)
+    {
+      int x_scaled = x >> SCALE_SHIFT;
+      unsigned int r = 0, g = 0, b = 0, a = 0;
+      int *pixel_weights;
+      
+      pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y;
+
+      for (i=0; i<n_y; i++)
+       {
+         art_u8 *q = src[i] + x_scaled * src_channels;
+         int *line_weights = pixel_weights + n_x * i;
+         
+         for (j=0; j<n_x; j++)
+           {
+             unsigned int ta;
+             
+             if (src_has_alpha)
+               ta = q[3] * line_weights[j];
+             else
+               ta = 0xff * line_weights[j];
+                 
+             r += ta * q[0];
+             g += ta * q[1];
+             b += ta * q[2];
+             a += ta;
+
+             q += src_channels;
+           }
+       }
+
+      if ((dest_x >> check_shift) & 1)
+       {
+         dest[0] = ((0xff0000 - a) * dest_r2 + r) >> 24;
+         dest[1] = ((0xff0000 - a) * dest_g2 + g) >> 24;
+         dest[2] = ((0xff0000 - a) * dest_b2 + b) >> 24;
+       }
+      else
+       {
+         dest[0] = ((0xff0000 - a) * dest_r1 + r) >> 24;
+         dest[1] = ((0xff0000 - a) * dest_g1 + g) >> 24;
+         dest[2] = ((0xff0000 - a) * dest_b1 + b) >> 24;
+       }
+
+      if (dest_has_alpha)
+       dest[3] = 0xff;
+      else if (dest_channels == 4)
+       dest[3] = a >> 16;
+       
+      dest += dest_channels;
+      x += x_step;
+      dest_x++;
+    }
+
+  return dest;
+}
+
+#ifdef USE_MMX
+static art_u8 *
+composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y,
+                                     art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                                     art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                                     int x_init, int x_step, int src_width,
+                                     int check_size, art_u32 color1, art_u32 color2)
+{
+  art_u32 mmx_weights[16][8];
+  int check_shift = get_check_shift (check_size);
+  int colors[4];
+  int j;
+
+  for (j=0; j<16; j++)
+    {
+      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
+      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
+      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
+      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
+      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
+      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
+      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
+      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
+    }
+
+  colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff);
+  colors[1] = (color1 & 0xff0000) >> 16;
+  colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff);
+  colors[3] = (color2 & 0xff0000) >> 16;
+
+  return pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0], src[1], x_step, dest_end, x_init,
+                                                dest_x, check_shift, colors);
+}
+#endif /* USE_MMX */
+
+static void
+scale_pixel (art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha,
+            int src_has_alpha, int check_size, art_u32 color1, art_u32 color2,
+            int r, int g, int b, int a)
+{
+  if (src_has_alpha)
+    {
+      if (a)
+       {
+         dest[0] = r / a;
+         dest[1] = g / a;
+         dest[2] = b / a;
+         dest[3] = a >> 16;
+       }
+      else
+       {
+         dest[0] = 0;
+         dest[1] = 0;
+         dest[2] = 0;
+         dest[3] = 0;
+       }
+    }
+  else
+    {
+      dest[0] = r >> 16;
+      dest[1] = g >> 16;
+      dest[2] = b >> 16;
+      
+      if (dest_has_alpha)
+       dest[3] = 0xff;
+    }
+}
+
+static art_u8 *
+scale_line (int *weights, int n_x, int n_y,
+           art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+           art_u8 **src, int src_channels, art_boolean src_has_alpha,
+           int x_init, int x_step, int src_width,
+           int check_size, art_u32 color1, art_u32 color2)
+{
+  int x = x_init;
+  int i, j;
+
+  while (dest < dest_end)
+    {
+      int x_scaled = x >> SCALE_SHIFT;
+      int *pixel_weights;
+
+      pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y;
+
+      if (src_has_alpha)
+       {
+         unsigned int r = 0, g = 0, b = 0, a = 0;
+         for (i=0; i<n_y; i++)
+           {
+             art_u8 *q = src[i] + x_scaled * dest_channels;
+             int *line_weights  = pixel_weights + n_x * i;
+             
+             for (j=0; j<n_x; j++)
+               {
+                 unsigned int ta;
+                 
+                 ta = q[3] * line_weights[j];
+                 r = ta * q[0];
+                 g = ta * q[1];
+                 b = ta * q[2];
+                 a += ta;
+                 
+                 q += src_channels;
+               }
+           }
+
+         if (a)
+           {
+             dest[0] = r / a;
+             dest[1] = g / a;
+             dest[2] = b / a;
+             dest[3] = a >> 16;
+           }
+         else
+           {
+             dest[0] = 0;
+             dest[1] = 0;
+             dest[2] = 0;
+             dest[3] = 0;
+           }
+       }
+      else
+       {
+         unsigned int r = 0, g = 0, b = 0;
+         for (i=0; i<n_y; i++)
+           {
+             art_u8 *q = src[i] + x_scaled * dest_channels;
+             int *line_weights  = pixel_weights + n_x * i;
+             
+             for (j=0; j<n_x; j++)
+               {
+                 unsigned int ta = line_weights[j];
+                 
+                 r += ta * q[0];
+                 g += ta * q[1];
+                 b += ta * q[2];
+
+                 q += src_channels;
+               }
+           }
+
+         dest[0] = r >> 16;
+         dest[1] = g >> 16;
+         dest[2] = b >> 16;
+         
+         if (dest_has_alpha)
+           dest[3] = 0xff;
+       }
+
+      dest += dest_channels;
+      
+      x += x_step;
+    }
+
+  return dest;
+}
+
+#ifdef USE_MMX 
+static art_u8 *
+scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y,
+                          art_u8 *dest, int dest_x, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                          art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                          int x_init, int x_step, int src_width,
+                          int check_size, art_u32 color1, art_u32 color2)
+{
+  art_u32 mmx_weights[16][8];
+  int j;
+
+  for (j=0; j<16; j++)
+    {
+      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
+      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
+      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
+      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
+      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
+      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
+      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
+      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
+    }
+
+  return pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1], x_step, dest_end, x_init);
+}
+#endif /* USE_MMX */
+
+static art_u8 *
+scale_line_22_33 (int *weights, int n_x, int n_y,
+                 art_u8 *dest, art_u8 *dest_end, int dest_channels, int dest_has_alpha,
+                 art_u8 **src, int src_channels, art_boolean src_has_alpha,
+                 int x_init, int x_step, int src_width,
+                 int check_size, art_u32 color1, art_u32 color2)
+{
+  int x = x_init;
+  art_u8 *src0 = src[0];
+  art_u8 *src1 = src[1];
+  
+  while (dest < dest_end)
+    {
+      unsigned int r, g, b;
+      int x_scaled = x >> SCALE_SHIFT;
+      int *pixel_weights;
+      art_u8 *q0, *q1;
+      int w1, w2, w3, w4;
+
+      q0 = src0 + x_scaled * 3;
+      q1 = src1 + x_scaled * 3;
+      
+      pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4)));
+      
+      w1 = pixel_weights[0];
+      w2 = pixel_weights[1];
+      w3 = pixel_weights[2];
+      w4 = pixel_weights[3];
+
+      r = w1 * q0[0];
+      g = w1 * q0[1];
+      b = w1 * q0[2];
+
+      r += w2 * q0[3];
+      g += w2 * q0[4];
+      b += w2 * q0[5];
+
+      r += w3 * q1[0];
+      g += w3 * q1[1];
+      b += w3 * q1[2];
+
+      r += w4 * q1[4];
+      g += w4 * q1[5];
+      b += w4 * q1[6];
+
+      dest[0] = r >> 16;
+      dest[1] = g >> 16;
+      dest[2] = b >> 16;
+      
+      dest += 3;
+      
+      x += x_step;
+    }
+  
+  return dest;
+}
+
+static void
+process_pixel (int *weights, int n_x, int n_y,
+              art_u8 *dest, int dest_x, int dest_channels, int dest_has_alpha,
+              art_u8 **src, int src_channels, art_boolean src_has_alpha,
+              int x_start, int src_width,
+              int check_size, art_u32 color1, art_u32 color2,
+              PixopsPixelFunc pixel_func)
+{
+  unsigned int r = 0, g = 0, b = 0, a = 0;
+  int i, j;
+  
+  for (i=0; i<n_y; i++)
+    {
+      int *line_weights  = weights + n_x * i;
+
+      for (j=0; j<n_x; j++)
+       {
+         unsigned int ta;
+         art_u8 *q;
+
+         if (x_start + j < 0)
+           q = src[i];
+         else if (x_start + j < src_width)
+           q = src[i] + (x_start + j) * src_channels;
+         else
+           q = src[i] + (src_width - 1) * src_channels;
+
+         if (src_has_alpha)
+           ta = q[3] * line_weights[j];
+         else
+           ta = 0xff * line_weights[j];
+
+         r += ta * q[0];
+         g += ta * q[1];
+         b += ta * q[2];
+         a += ta;
+       }
+    }
+
+  (*pixel_func) (dest, dest_x, dest_channels, dest_has_alpha, src_has_alpha, check_size, color1, color2, r, g, b, a);
+}
+
+static void
+pixops_process (art_u8         *dest_buf,
+               int             render_x0,
+               int             render_y0,
+               int             render_x1,
+               int             render_y1,
+               int             dest_rowstride,
+               int             dest_channels,
+               art_boolean     dest_has_alpha,
+               art_u8         *src_buf,
+               int             src_width,
+               int             src_height,
+               int             src_rowstride,
+               int             src_channels,
+               art_boolean     src_has_alpha,
+               double          scale_x,
+               double          scale_y,
+               int             check_x,
+               int             check_y,
+               int             check_size,
+               art_u32         color1,
+               art_u32         color2,
+               PixopsFilter   *filter,
+               PixopsLineFunc  line_func,
+               PixopsPixelFunc pixel_func)
+{
+  int i, j;
+  int x, y;
+  art_u8 **line_bufs = g_new (art_u8 *, filter->n_y);
+
+  int x_step = (1 << SCALE_SHIFT) / scale_x;
+  int y_step = (1 << SCALE_SHIFT) / scale_y;
+
+  int dest_x;
+
+  /* FIXME, this computation of run_end_index is not correct */
+  int run_end_index = ((src_width << SCALE_SHIFT) + (filter->n_x - 1) / 2 - filter->n_x) / x_step - render_x0;
+  int check_shift = check_size ? get_check_shift (check_size) : 0;
+
+  y = render_y0 * y_step + filter->y_offset * (1 << SCALE_SHIFT);
+  for (i = 0; i < (render_y1 - render_y0); i++)
+    {
+      int y_start = y >> SCALE_SHIFT;
+      int x_start;
+      int *run_weights = filter->weights + ((y >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * filter->n_x * filter->n_y * SUBSAMPLE;
+      art_u8 *new_outbuf;
+      art_u32 tcolor1, tcolor2;
+      
+      art_u8 *outbuf = dest_buf + dest_rowstride * i;
+      art_u8 *outbuf_end = outbuf + dest_channels * (render_x1 - render_x0);
+
+      if (((i + check_y) >> check_shift) & 1)
+       {
+         tcolor1 = color2;
+         tcolor2 = color1;
+       }
+      else
+       {
+         tcolor1 = color1;
+         tcolor2 = color2;
+       }
+
+      for (j=0; j<filter->n_y; j++)
+       {
+         if (y_start <  0)
+           line_bufs[j] = src_buf;
+         else if (y_start < src_height)
+           line_bufs[j] = src_buf + src_rowstride * y_start;
+         else
+           line_bufs[j] = src_buf + src_rowstride * (src_height - 1);
+
+         y_start++;
+       }
+
+      dest_x = check_x;
+      x = render_x0 * x_step + filter->x_offset * (1 << SCALE_SHIFT);
+      x_start = x >> SCALE_SHIFT;
+
+      while (x_start < 0 && outbuf < outbuf_end)
+       {
+         process_pixel (run_weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * (filter->n_x * filter->n_y), filter->n_x, filter->n_y,
+                        outbuf, dest_x, dest_channels, dest_has_alpha,
+                        line_bufs, src_channels, src_has_alpha,
+                        x >> SCALE_SHIFT, src_width,
+                        check_size, tcolor1, tcolor2, pixel_func);
+         
+         x += x_step;
+         x_start = x >> SCALE_SHIFT;
+         dest_x++;
+         outbuf += dest_channels;
+       }
+
+      new_outbuf = (*line_func)(run_weights, filter->n_x, filter->n_y,
+                               outbuf, dest_x,
+                               MIN (outbuf_end, outbuf + run_end_index * dest_channels),
+                               dest_channels, dest_has_alpha,
+                               line_bufs, src_channels, src_has_alpha,
+                               x, x_step, src_width, check_size, tcolor1, tcolor2);
+
+      dest_x += (new_outbuf - outbuf) / dest_channels;
+      
+      x = dest_x * x_step + filter->x_offset * (1 << SCALE_SHIFT);
+      outbuf = new_outbuf;
+
+      while (outbuf < outbuf_end)
+       {
+         process_pixel (run_weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * (filter->n_x * filter->n_y), filter->n_x, filter->n_y,
+                        outbuf, dest_x, dest_channels, dest_has_alpha,
+                        line_bufs, src_channels, src_has_alpha,
+                        x >> SCALE_SHIFT, src_width,
+                        check_size, tcolor1, tcolor2, pixel_func);
+         
+         x += x_step;
+         dest_x++;
+         outbuf += dest_channels;
+       }
+
+      y += y_step;
+    }
+
+  g_free (line_bufs);
+}
+
+static void
+tile_make_weights (PixopsFilter *filter, double x_scale, double y_scale, double overall_alpha)
+{
+  int i_offset, j_offset;
+
+  int n_x = ceil(1/x_scale + 1);
+  int n_y = ceil(1/y_scale + 1);
+
+  filter->x_offset = 0;
+  filter->y_offset = 0;
+  filter->n_x = n_x;
+  filter->n_y = n_y;
+  filter->weights = g_new (int, SUBSAMPLE * SUBSAMPLE * n_x * n_y);
+
+  for (i_offset=0; i_offset<SUBSAMPLE; i_offset++)
+    for (j_offset=0; j_offset<SUBSAMPLE; j_offset++)
+      {
+       int *pixel_weights = filter->weights + ((i_offset*SUBSAMPLE) + j_offset) * n_x * n_y;
+       double x = (double)j_offset / 16;
+       double y = (double)i_offset / 16;
+       int i,j;
+         
+       for (i = 0; i < n_y; i++)
+         {
+           double tw, th;
+               
+           if (i < y)
+             {
+               if (i + 1 > y)
+                 th = MIN(i+1, y + 1/y_scale) - y;
+               else
+                 th = 0;
+             }
+           else
+             {
+               if (y + 1/y_scale > i)
+                 th = MIN(i+1, y + 1/y_scale) - i;
+               else
+                 th = 0;
+             }
+               
+           for (j = 0; j < n_x; j++)
+             {
+               if (j < x)
+                 {
+                   if (j + 1 > x)
+                     tw = MIN(j+1, x + 1/x_scale) - x;
+                   else
+                     tw = 0;
+                 }
+               else
+                 {
+                   if (x + 1/x_scale > j)
+                     tw = MIN(j+1, x + 1/x_scale) - j;
+                   else
+                     tw = 0;
+                 }
+
+               *(pixel_weights + n_x * i + j) = 65536 * tw * x_scale * th * y_scale * overall_alpha;
+             }
+         }
+      }
+}
+
+static void
+bilinear_make_fast_weights (PixopsFilter *filter, double x_scale, double y_scale, double overall_alpha)
+{
+  int i_offset, j_offset;
+  double *x_weights, *y_weights;
+  int n_x, n_y;
+
+  if (x_scale > 1.0)           /* Bilinear */
+    {
+      n_x = 2;
+      filter->x_offset = 0.5 * (1/x_scale - 1);
+    }
+  else                         /* Tile */
+    {
+      n_x = ceil(1.0 + 1.0/x_scale);
+      filter->x_offset = 0.0;
+    }
+
+  if (y_scale > 1.0)           /* Bilinear */
+    {
+      n_y = 2;
+      filter->y_offset = 0.5 * (1/y_scale - 1);
+    }
+  else                         /* Tile */
+    {
+      n_y = ceil(1.0 + 1.0/y_scale);
+      filter->y_offset = 0.0;
+    }
+
+  filter->n_y = n_y;
+  filter->n_x = n_x;
+  filter->weights = g_new (int, SUBSAMPLE * SUBSAMPLE * n_x * n_y);
+
+  x_weights = g_new (double, n_x);
+  y_weights = g_new (double, n_y);
+
+  for (i_offset=0; i_offset<SUBSAMPLE; i_offset++)
+    for (j_offset=0; j_offset<SUBSAMPLE; j_offset++)
+      {
+       int *pixel_weights = filter->weights + ((i_offset*SUBSAMPLE) + j_offset) * n_x * n_y;
+       double x = (double)j_offset / 16;
+       double y = (double)i_offset / 16;
+       int i,j;
+
+       if (x_scale > 1.0)      /* Bilinear */
+         {
+           for (i = 0; i < n_x; i++)
+             {
+               x_weights[i] = ((i == 0) ? (1 - x) : x) / x_scale;
+             }
+         }
+       else                    /* Tile */
+         {
+           for (i = 0; i < n_x; i++)
+             {
+               if (i < x)
+                 {
+                   if (i + 1 > x)
+                     x_weights[i] = MIN(i+1, x + 1/x_scale) - x;
+                   else
+                     x_weights[i] = 0;
+                 }
+               else
+                 {
+                   if (x + 1/x_scale > i)
+                     x_weights[i] = MIN(i+1, x + 1/x_scale) - i;
+                   else
+                     x_weights[i] = 0;
+                 }
+             }
+         }
+
+       if (y_scale > 1.0)      /* Bilinear */
+         {
+           for (i = 0; i < n_y; i++)
+             {
+               y_weights[i] = ((i == 0) ? (1 - y) : y) / y_scale;
+             }
+         }
+       else                    /* Tile */
+         {
+           for (i = 0; i < n_y; i++)
+             {
+               if (i < y)
+                 {
+                   if (i + 1 > y)
+                     y_weights[i] = MIN(i+1, y + 1/y_scale) - y;
+                   else
+                     y_weights[i] = 0;
+                 }
+               else
+                 {
+                   if (y + 1/y_scale > i)
+                     y_weights[i] = MIN(i+1, y + 1/y_scale) - i;
+                   else
+                     y_weights[i] = 0;
+                 }
+             }
+         }
+
+       for (i = 0; i < n_y; i++)
+         for (j = 0; j < n_x; j++)
+           *(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha;
+      }
+
+  g_free (x_weights);
+  g_free (y_weights);
+}
+
+static double
+bilinear_quadrant (double bx0, double bx1, double by0, double by1)
+{
+  double ax0, ax1, ay0, ay1;
+  double x0, x1, y0, y1;
+
+  ax0 = 0.;
+  ax1 = 1.;
+  ay0 = 0.;
+  ay1 = 1.;
+
+  if (ax0 < bx0)
+    {
+      if (ax1 > bx0)
+       {
+         x0 = bx0;
+         x1 = MIN (ax1, bx1);
+       }
+      else
+       return 0;
+    }
+  else
+    {
+      if (bx1 > ax0)
+       {
+         x0 = ax0;
+         x1 = MIN (ax1, bx1);
+       }
+      else
+       return 0;
+    }
+
+  if (ay0 < by0)
+    {
+      if (ay1 > by0)
+       {
+         y0 = by0;
+         y1 = MIN (ay1, by1);
+       }
+      else
+       return 0;
+    }
+  else
+    {
+      if (by1 > ay0)
+       {
+         y0 = ay0;
+         y1 = MIN (ay1, by1);
+       }
+      else
+       return 0;
+    }
+
+  return 0.25 * (x1*x1 - x0*x0) * (y1*y1 - y0*y0);
+}
+
+static void
+bilinear_make_weights (PixopsFilter *filter, double x_scale, double y_scale, double overall_alpha)
+{
+  int i_offset, j_offset;
+
+  int n_x = ceil(1/x_scale + 2.0);
+  int n_y = ceil(1/y_scale + 2.0);
+
+  filter->x_offset = -1.0;
+  filter->y_offset = -1.0;
+  filter->n_x = n_x;
+  filter->n_y = n_y;
+  
+  filter->weights = g_new (int, SUBSAMPLE * SUBSAMPLE * n_x * n_y);
+
+  for (i_offset=0; i_offset<SUBSAMPLE; i_offset++)
+    for (j_offset=0; j_offset<SUBSAMPLE; j_offset++)
+      {
+       int *pixel_weights = filter->weights + ((i_offset*SUBSAMPLE) + j_offset) * n_x * n_y;
+       double x = (double)j_offset / 16;
+       double y = (double)i_offset / 16;
+       int i,j;
+         
+       for (i = 0; i < n_y; i++)
+         for (j = 0; j < n_x; j++)
+           {
+             double w;
+
+             w = bilinear_quadrant  (0.5 + j - (x + 1 / x_scale), 0.5 + j - x, 0.5 + i - (y + 1 / y_scale), 0.5 + i - y);
+             w += bilinear_quadrant (1.5 + x - j, 1.5 + (x + 1 / x_scale) - j, 0.5 + i - (y + 1 / y_scale), 0.5 + i - y);
+             w += bilinear_quadrant (0.5 + j - (x + 1 / x_scale), 0.5 + j - x, 1.5 + y - i, 1.5 + (y + 1 / y_scale) - i);
+             w += bilinear_quadrant (1.5 + x - j, 1.5 + (x + 1 / x_scale) - j, 1.5 + y - i, 1.5 + (y + 1 / y_scale) - i);
+             
+             *(pixel_weights + n_x * i + j) = 65536 * w * x_scale * y_scale * overall_alpha;
+           }
+      }
+}
+
+void
+pixops_composite_color (art_u8         *dest_buf,
+                       int             render_x0,
+                       int             render_y0,
+                       int             render_x1,
+                       int             render_y1,
+                       int             dest_rowstride,
+                       int             dest_channels,
+                       art_boolean     dest_has_alpha,
+                       art_u8         *src_buf,
+                       int             src_width,
+                       int             src_height,
+                       int             src_rowstride,
+                       int             src_channels,
+                       art_boolean     src_has_alpha,
+                       double          scale_x,
+                       double          scale_y,
+                       ArtFilterLevel  filter_level,
+                       int             overall_alpha,
+                       int             check_x,
+                       int             check_y,
+                       int             check_size,
+                       art_u32         color1,
+                       art_u32         color2)
+{
+  PixopsFilter filter;
+  PixopsLineFunc line_func;
+  
+#ifdef USE_MMX
+  art_boolean found_mmx = pixops_have_mmx();
+#endif
+
+  g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
+  g_return_if_fail (!(src_channels == 3 && src_has_alpha));
+
+  if (!src_has_alpha && overall_alpha == 255)
+    pixops_scale (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                 dest_rowstride, dest_channels, dest_has_alpha,
+                 src_buf, src_width, src_height, src_rowstride, src_channels,
+                 src_has_alpha, scale_x, scale_y, filter_level);
+
+  switch (filter_level)
+    {
+    case ART_FILTER_NEAREST:
+      pixops_composite_color_nearest (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                                     dest_rowstride, dest_channels, dest_has_alpha,
+                                     src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha,
+                                     scale_x, scale_y, overall_alpha,
+                                     check_x, check_y, check_size, color1, color2);
+      return;
+
+    case ART_FILTER_TILES:
+      tile_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.);
+      break;
+      
+    case ART_FILTER_BILINEAR:
+      bilinear_make_fast_weights (&filter, scale_x, scale_y, overall_alpha / 255.);
+      break;
+      
+    case ART_FILTER_HYPER:
+      bilinear_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.);
+      break;
+    }
+
+#ifdef USE_MMX
+  if (filter.n_x == 2 && filter.n_y == 2 &&
+      dest_channels == 4 && src_channels == 4 && src_has_alpha && !dest_has_alpha && found_mmx)
+    line_func = composite_line_color_22_4a4_mmx_stub;
+  else
+#endif    
+    line_func = composite_line_color;
+  
+  pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                 dest_rowstride, dest_channels, dest_has_alpha,
+                 src_buf, src_width, src_height, src_rowstride, src_channels,
+                 src_has_alpha, scale_x, scale_y, check_x, check_y, check_size, color1, color2,
+                 &filter, line_func, composite_pixel_color);
+
+  g_free (filter.weights);
+}
+
+void
+pixops_composite (art_u8        *dest_buf,
+                 int            render_x0,
+                 int            render_y0,
+                 int            render_x1,
+                 int            render_y1,
+                 int            dest_rowstride,
+                 int            dest_channels,
+                 art_boolean    dest_has_alpha,
+                 art_u8        *src_buf,
+                 int            src_width,
+                 int            src_height,
+                 int            src_rowstride,
+                 int            src_channels,
+                 art_boolean    src_has_alpha,
+                 double         scale_x,
+                 double         scale_y,
+                 ArtFilterLevel filter_level,
+                 int            overall_alpha)
+{
+  PixopsFilter filter;
+  PixopsLineFunc line_func;
+  
+#ifdef USE_MMX
+  art_boolean found_mmx = pixops_have_mmx();
+#endif
+
+  g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
+  g_return_if_fail (!(src_channels == 3 && src_has_alpha));
+
+  if (!src_has_alpha && overall_alpha == 255)
+    pixops_scale (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                 dest_rowstride, dest_channels, dest_has_alpha,
+                 src_buf, src_width, src_height, src_rowstride, src_channels,
+                 src_has_alpha, scale_x, scale_y, filter_level);
+
+  switch (filter_level)
+    {
+    case ART_FILTER_NEAREST:
+      pixops_composite_nearest (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                               dest_rowstride, dest_channels, dest_has_alpha,
+                               src_buf, src_width, src_height, src_rowstride, src_channels,
+                               src_has_alpha, scale_x, scale_y, overall_alpha);
+      return;
+
+    case ART_FILTER_TILES:
+      tile_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.);
+      break;
+      
+    case ART_FILTER_BILINEAR:
+      bilinear_make_fast_weights (&filter, scale_x, scale_y, overall_alpha / 255.);
+      break;
+      
+    case ART_FILTER_HYPER:
+      bilinear_make_weights (&filter, scale_x, scale_y, overall_alpha / 255.);
+      break;
+    }
+
+  if (filter.n_x == 2 && filter.n_y == 2 &&
+      dest_channels == 4 && src_channels == 4 && src_has_alpha && !dest_has_alpha)
+    {
+#ifdef USE_MMX
+      if (found_mmx)
+       line_func = composite_line_22_4a4_mmx_stub;
+      else
+#endif 
+       line_func = composite_line_22_4a4;
+    }
+  else
+    line_func = composite_line;
+  
+  pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                 dest_rowstride, dest_channels, dest_has_alpha,
+                 src_buf, src_width, src_height, src_rowstride, src_channels,
+                 src_has_alpha, scale_x, scale_y, 0, 0, 0, 0, 0, 
+                 &filter, line_func, composite_pixel);
+
+  g_free (filter.weights);
+}
+
+void
+pixops_scale (art_u8        *dest_buf,
+             int            render_x0,
+             int            render_y0,
+             int            render_x1,
+             int            render_y1,
+             int            dest_rowstride,
+             int            dest_channels,
+             art_boolean    dest_has_alpha,
+             art_u8        *src_buf,
+             int            src_width,
+             int            src_height,
+             int            src_rowstride,
+             int            src_channels,
+             art_boolean    src_has_alpha,
+             double         scale_x,
+             double         scale_y,
+             ArtFilterLevel filter_level)
+{
+  PixopsFilter filter;
+  PixopsLineFunc line_func;
+
+#ifdef USE_MMX
+  art_boolean found_mmx = pixops_have_mmx();
+#endif
+
+  g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
+  g_return_if_fail (!(src_channels == 3 && src_has_alpha));
+  g_return_if_fail (!(src_has_alpha && !dest_has_alpha));
+
+  switch (filter_level)
+    {
+    case ART_FILTER_NEAREST:
+      pixops_scale_nearest (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                           dest_rowstride, dest_channels, dest_has_alpha,
+                           src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha,
+                           scale_x, scale_y);
+      return;
+
+    case ART_FILTER_TILES:
+      tile_make_weights (&filter, scale_x, scale_y, 1.0);
+      break;
+      
+    case ART_FILTER_BILINEAR:
+      bilinear_make_fast_weights (&filter, scale_x, scale_y, 1.0);
+      break;
+      
+    case ART_FILTER_HYPER:
+      bilinear_make_weights (&filter, scale_x, scale_y, 1.0);
+      break;
+    }
+
+#ifdef USE_MMX
+  if (filter.n_x == 2 && filter.n_y == 2 &&
+      found_mmx && dest_channels == 3 && src_channels == 3)
+    line_func = scale_line_22_33_mmx_stub;
+  else
+#endif    
+    line_func = scale_line;
+  
+  pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
+                 dest_rowstride, dest_channels, dest_has_alpha,
+                 src_buf, src_width, src_height, src_rowstride, src_channels,
+                 src_has_alpha, scale_x, scale_y, 0, 0, 0, 0, 0,
+                 &filter, line_func, scale_pixel);
+
+  g_free (filter.weights);
+}
+
diff --git a/gdk-pixbuf/pixops/pixops.h b/gdk-pixbuf/pixops/pixops.h
new file mode 100644 (file)
index 0000000..387b46b
--- /dev/null
@@ -0,0 +1,80 @@
+#include <libart_lgpl/art_misc.h>
+#include <libart_lgpl/art_filterlevel.h>
+
+/* Scale src_buf from src_width / src_height by factors scale_x, scale_y
+ * and composite the portion corresponding to
+ * render_x, render_y, render_width, render_height in the new
+ * coordinate system into dest_buf starting at 0, 0
+ */
+void pixops_composite (art_u8         *dest_buf,
+                      int             render_x0,
+                      int             render_y0,
+                      int             render_x1,
+                      int             render_y1,
+                      int             dest_rowstride,
+                      int             dest_channels,
+                      int             dest_has_alpha,
+                      art_u8         *src_buf,
+                      int             src_width,
+                      int             src_height,
+                      int             src_rowstride,
+                      int             src_channels,
+                      int             src_has_alpha,
+                      double          scale_x,
+                      double          scale_y,
+                      ArtFilterLevel  filter_level,
+                      int             overall_alpha);
+
+/* Scale src_buf from src_width / src_height by factors scale_x, scale_y
+ * and composite the portion corresponding to
+ * render_x, render_y, render_width, render_height in the new
+ * coordinate system against a checkboard with checks of size check_size
+ * of the colors color1 and color2 into dest_buf starting at 0, 0
+ */
+void pixops_composite_color (art_u8         *dest_buf,
+                            int             render_x0,
+                            int             render_y0,
+                            int             render_x1,
+                            int             render_y1,
+                            int             dest_rowstride,
+                            int             dest_channels,
+                            int             dest_has_alpha,
+                            art_u8         *src_buf,
+                            int             src_width,
+                            int             src_height,
+                            int             src_rowstride,
+                            int             src_channels,
+                            int             src_has_alpha,
+                            double          scale_x,
+                            double          scale_y,
+                            ArtFilterLevel  filter_level,
+                            int             overall_alpha,
+                            int             check_x,
+                            int             check_y,
+                            int             check_size,
+                            art_u32         color1,
+                            art_u32         color2);
+
+/* Scale src_buf from src_width / src_height by factors scale_x, scale_y
+ * and composite the portion corresponding to
+ * render_x, render_y, render_width, render_height in the new
+ * coordinate system into dest_buf starting at 0, 0
+ */
+void pixops_scale     (art_u8         *dest_buf,
+                      int             render_x0,
+                      int             render_y0,
+                      int             render_x1,
+                      int             render_y1,
+                      int             dest_rowstride,
+                      int             dest_channels,
+                      int             dest_has_alpha,
+                      art_u8         *src_buf,
+                      int             src_width,
+                      int             src_height,
+                      int             src_rowstride,
+                      int             src_channels,
+                      int             src_has_alpha,
+                      double          scale_x,
+                      double          scale_y,
+                      ArtFilterLevel  filter_level);
+
diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
new file mode 100644 (file)
index 0000000..e253fc1
--- /dev/null
@@ -0,0 +1,152 @@
+       .file   "scale_line_22_33_mmx.S"
+       .version        "01.01"
+gcc2_compiled.:
+.text
+       .align 16
+.globl pixops_scale_line_22_33_mmx
+       .type    pixops_scale_line_22_33_mmx,@function
+/*
+ * Arguments
+ *             
+ * weights:     8(%ebp)
+ * p:          12(%ebp)        %esi
+ * q1:         16(%ebp)        
+ * q2:         20(%ebp)        
+ * xstep:       24(%ebp)       
+ * p_end:       28(%ebp)
+ * xinit:       32(%ebp)
+ *     
+*/
+pixops_scale_line_22_33_mmx:
+/*
+ * Function call entry
+ */
+       pushl %ebp
+       movl %esp,%ebp
+       subl $28,%esp
+       pushl %edi
+       pushl %esi
+       pushl %ebx
+/* Locals:     
+ * int x                      %ebx
+ * int x_scaled             -24(%ebp)
+ */
+
+/*
+ * Setup
+ */
+/* Initialize variables */     
+       movl 32(%ebp),%ebx
+       movl 32(%ebp),%edx
+       sarl $16,%edx
+       movl 12(%ebp),%esi
+
+       cmpl %esi,28(%ebp)
+       je   .out
+
+/* Load initial values into %mm1, %mm3 */
+       leal (%edx,%edx,2),%edx  # Multiply by 3
+
+       movl 16(%ebp),%edi
+       pxor %mm4, %mm4
+       movzbl 2(%edi,%edx),%ecx
+       shll $16,%ecx
+       movzwl (%edi,%edx),%eax
+       orl %eax,%ecx
+       movd %ecx, %mm1
+       punpcklbw %mm4, %mm1
+
+       movl 20(%ebp),%edi
+       movzbl 2(%edi,%edx),%ecx
+       shll $16,%ecx
+       movzwl (%edi,%edx),%eax
+       orl %eax,%ecx
+       movd %ecx, %mm3
+       punpcklbw %mm4, %mm3
+
+       addl $65536,%ebx
+       movl %ebx,%edx
+       sarl $16,%edx
+
+       jmp .newx
+       .p2align 4,,7
+.loop:
+/* int x_index = (x & 0xf000) >> 12 */
+       movl %ebx,%eax
+       andl $0xf000,%eax
+       shrl $7,%eax
+
+       movq (%edi,%eax),%mm4
+       pmullw %mm0,%mm4
+       movq 8(%edi,%eax),%mm5
+       pmullw %mm1,%mm5
+       movq 16(%edi,%eax),%mm6
+       movq 24(%edi,%eax),%mm7
+       pmullw %mm2,%mm6
+       pmullw %mm3,%mm7
+       paddw %mm4, %mm5
+       paddw %mm6, %mm7
+       paddw %mm5, %mm7
+
+       psrlw $8, %mm7
+       packuswb %mm7, %mm7
+       movd %mm7, %eax
+       
+       movb %al, (%esi)
+       shrl $8, %eax
+       movw %ax, 1(%esi)
+       addl $3, %esi
+               
+       cmpl %esi,28(%ebp)
+       je   .out
+
+/* x += x_step; */
+       addl 24(%ebp),%ebx
+/* x_scale = x >> 16; */
+       movl %ebx,%edx
+       sarl $16,%edx
+
+       cmpl %edx,-24(%ebp)
+       je   .loop
+
+.newx:
+       movl %edx,-24(%ebp)
+/*
+ * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
+ */
+       movq %mm1, %mm0
+       movq %mm3, %mm2
+       
+       leal (%edx,%edx,2),%edx  # Multiply by 3
+
+       movl 16(%ebp),%edi
+       pxor %mm4, %mm4
+       movzbl 2(%edi,%edx),%ecx
+       shll $16,%ecx
+       movzwl (%edi,%edx),%eax
+       orl %eax,%ecx
+       movd %ecx, %mm1
+       punpcklbw %mm4, %mm1
+
+       movl 20(%ebp),%edi
+       movzbl 2(%edi,%edx),%ecx
+       shll $16,%ecx
+       movzwl (%edi,%edx),%eax
+       orl %eax,%ecx
+       movd %ecx, %mm3
+       punpcklbw %mm4, %mm3
+       
+       movl 8(%ebp),%edi
+       
+       jmp .loop
+
+.out:
+       movl %esi,%eax
+       emms
+       leal -40(%ebp),%esp
+       popl %ebx
+       popl %esi
+       popl %edi
+       movl %ebp,%esp
+       popl %ebp
+       ret
diff --git a/gdk-pixbuf/pixops/timescale.c b/gdk-pixbuf/pixops/timescale.c
new file mode 100644 (file)
index 0000000..1abefb8
--- /dev/null
@@ -0,0 +1,227 @@
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "pixops.h"
+
+struct timeval start_time;
+
+void start_timing (void)
+{
+  gettimeofday (&start_time, NULL);
+}
+
+double
+stop_timing (const char *test, int iterations, int bytes)
+{
+  struct timeval stop_time;
+  double msecs;
+  
+  gettimeofday (&stop_time, NULL);
+  if (stop_time.tv_usec < start_time.tv_usec)
+    {
+      stop_time.tv_usec += 1000000;
+      stop_time.tv_sec -= 1;
+    }
+
+  msecs = (stop_time.tv_sec - start_time.tv_sec) * 1000. +
+          (stop_time.tv_usec - start_time.tv_usec) / 1000.;
+
+  printf("%s%d\t%.1f\t\t%.2f\t\t%.2f\n",
+        test, iterations, msecs, msecs / iterations, ((double)bytes * iterations) / (1000*msecs));
+
+  return ((double)bytes * iterations) / (1000*msecs);
+}
+
+void
+init_array (double times[3][3][4])
+{
+  int i, j, k;
+  
+  for (i=0; i<3; i++)
+    for (j=0; j<3; j++)
+      for (k=0; j<4; k++)
+       times[i][j][k] = -1;
+}
+
+void
+dump_array (double times[3][3][4])
+{
+  int i, j, k;
+  
+  printf("        3\t4\t4a\n");
+  for (i=0; i<3; i++)
+    {
+      for (j=0; j<4; j++)
+       {
+         if (j == 0)
+           switch (i)
+             {
+             case 0:
+               printf("3  ");
+               break;
+             case 1:
+               printf("4  ");
+               break;
+             case 2:
+               printf("4a ");
+               break;
+             }
+         else
+           printf("   ");
+
+         printf("%6.2f  %6.2f   %6.2f",
+                times[i][0][j], times[i][1][j], times[i][2][j]);
+
+         switch (j)
+           {
+           case ART_FILTER_NEAREST:
+             printf ("  NEAREST\n");
+             break;
+           case ART_FILTER_TILES:
+             printf ("  TILES\n");
+             break;
+           case ART_FILTER_BILINEAR:
+             printf ("  BILINEAR\n");
+             break;
+           case ART_FILTER_HYPER:
+             printf ("  HYPER\n");
+             break;
+           }
+       }
+    }
+  printf("\n");
+}
+
+#define ITERS 10
+
+int main (int argc, char **argv)
+{
+  int src_width, src_height, dest_width, dest_height;
+  char *src_buf, *dest_buf;
+  int src_index, dest_index;
+  int i;
+  double scale_times[3][3][4];
+  double composite_times[3][3][4];
+  double composite_color_times[3][3][4];
+
+  if (argc == 5)
+    {
+      src_width = atoi(argv[1]);
+      src_height = atoi(argv[2]);
+      dest_width = atoi(argv[3]);
+      dest_height = atoi(argv[4]);
+    }
+  else if (argc == 1)
+    {
+      src_width = 343;
+      src_height = 343;
+      dest_width = 711;
+      dest_height = 711;
+    }
+  else
+    {
+      fprintf (stderr, "Usage: scale [src_width src_height dest_width dest_height]\n");
+      exit(1);
+    }
+
+
+  printf ("Scaling from (%d, %d) to (%d, %d)\n\n", src_width, src_height, dest_width, dest_height);
+
+  for (src_index = 0; src_index < 3; src_index++)
+    for (dest_index = 0; dest_index < 3; dest_index++)
+      {
+       int src_channels = (src_index == 0) ? 3 : 4;
+       int src_has_alpha = (src_index == 2);
+       int dest_channels = (dest_index == 0) ? 3 : 4;
+       int dest_has_alpha = (dest_index == 2);
+       
+       int src_rowstride = (src_channels*src_width + 3) & ~3;
+       int dest_rowstride = (dest_channels *dest_width + 3) & ~3;
+
+       int filter_level;
+
+       src_buf = malloc(src_rowstride * src_height);
+       memset (src_buf, 0x80, src_rowstride * src_height);
+       
+       dest_buf = malloc(dest_rowstride * dest_height);
+       memset (dest_buf, 0x80, dest_rowstride * dest_height);
+
+       for (filter_level = ART_FILTER_NEAREST ; filter_level <= ART_FILTER_HYPER; filter_level++)
+         {
+           printf ("src_channels = %d (%s); dest_channels = %d (%s); filter_level=",
+                   src_channels, src_has_alpha ? "alpha" : "no alpha",
+                   dest_channels, dest_has_alpha ? "alpha" : "no alpha");
+           switch (filter_level)
+             {
+             case ART_FILTER_NEAREST:
+               printf ("ART_FILTER_NEAREST\n");
+               break;
+             case ART_FILTER_TILES:
+               printf ("ART_FILTER_TILES\n");
+               break;
+             case ART_FILTER_BILINEAR:
+               printf ("ART_FILTER_BILINEAR\n");
+               break;
+             case ART_FILTER_HYPER:
+               printf ("ART_FILTER_HYPER\n");
+               break;
+             }
+
+           printf("\t\t\titers\ttotal\t\tmsecs/iter\tMpixels/sec\t\n");
+
+
+           if (!(src_has_alpha && !dest_has_alpha))
+             {
+               start_timing ();
+               for (i = 0; i < ITERS; i++)
+                 {
+                   pixops_scale (dest_buf, 0, 0, dest_width, dest_height, dest_rowstride, dest_channels, dest_has_alpha,
+                                 src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha,
+                                 (double)dest_width / src_width, (double)dest_height / src_height,
+                                 filter_level);
+                 }
+               scale_times[src_index][dest_index][filter_level] =
+                 stop_timing ("   scale\t\t", ITERS, dest_height * dest_width);
+             }
+
+           start_timing ();
+           for (i = 0; i < ITERS; i++)
+             {
+               pixops_composite (dest_buf, 0, 0, dest_width, dest_height, dest_rowstride, dest_channels, dest_has_alpha,
+                             src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha,
+                             (double)dest_width / src_width, (double)dest_height / src_height,
+                             filter_level, 255);
+             }
+           composite_times[src_index][dest_index][filter_level] =
+             stop_timing ("   composite\t\t", ITERS, dest_height * dest_width);
+
+           start_timing ();
+           for (i = 0; i < ITERS; i++)
+             {
+               pixops_composite_color (dest_buf, 0, 0, dest_width, dest_height, dest_rowstride, dest_channels, dest_has_alpha,
+                                       src_buf, src_width, src_height, src_rowstride, src_channels, src_has_alpha,
+                                       (double)dest_width / src_width, (double)dest_height / src_height,
+                                       filter_level, 255, 0, 0, 16, 0xaaaaaa, 0x555555);
+             }
+           composite_color_times[src_index][dest_index][filter_level] =
+             stop_timing ("   composite color\t", ITERS, dest_height * dest_width);
+
+           printf ("\n");
+         }
+       printf ("\n");
+
+       free (src_buf);
+       free (dest_buf);
+      }
+
+  printf ("SCALE\n=====\n\n");
+  dump_array (scale_times);
+
+  printf ("COMPOSITE\n=========\n\n");
+  dump_array (composite_times);
+
+  printf ("COMPOSITE_COLOR\n===============\n\n");
+  dump_array (composite_color_times);
+}